**********Syntax file for Anoll, Allison P., Cindy D. Kam, and Colette Marcellin. "What Does Race Mean?  Racial Disparities in the Public Mind." Journal of Politics**********

*****Analysis with StataSE17*****
*****Note: requires installation of "txttool"*****
*****Note: requires stopword.txt to be placed in directory*****

set scheme s1mono
log using "Anoll Kam Marcellin Bovitz Analyses.smcl", replace

/*Cleaning the data*/
clear
cd "C:\Users\kamcd\Dropbox\CDK WORK\racial disparities essentialism\JOP replication files\"

use "Anoll Kam Marcellin Bovitz June 2022 Survey.dta"

gen newdate = dofc(StartDate)
gen double newtime = hh(StartDate)

//Dropping test data 
drop if newdate<22816
drop if newdate==22816 & newtime==10

//Removing those who have not consented
drop if agree~=1

//Removing those under 18
gen ageyears = age
destring ageyears, force replace
drop if ageyears<18 | ageyears>105
sum ageyears

//Keeping those who pass the attention check
keep if check_closeread_4==1 & check_closeread_5==1

//Take the first respondent of those with duplicate IP addresses
duplicates tag IPAddress, gen(dupIP)
egen IPdups = group(IPAddress) if dupIP>0
egen rank = rank(StartDate), track
bysort IPdups: egen firstrank = min(rank)
drop if firstrank~=rank & dupIP>0

//Sample characteristics
//We ordered 900 White Dems, 900 White Reps, 900 Blacks
//Race
gen White = .
replace White = 1 if race_5==1
replace White = 0 if race_1==1 | race_2==1|race_3==1|race_4==1|race_6==1
tab White

gen Black = .
replace Black = 1 if race_3==1
replace Black = 0 if race_1==1 | race_2==1|race_4==1|race_5==1|race_6==1

gen race = 0
replace race = 1 if White==1
replace race = 2 if Black==1
lab def race 0"Other" 1"White" 2"Black"
lab val race race

//Party
gen REP = 0
replace REP = 1 if strong_rep==1|strong_rep==2|lean==1
gen DEM = 0
replace DEM = 1 if strong_dem==1|strong_dem==2|lean==2
tab1 REP DEM

gen pid3cat = .
replace pid3cat = 0 if REP==1
replace pid3cat = 1 if DEM==1
replace pid3cat = .5 if lean==3
tab pid3cat
lab def pid3cat 0 "REP" 1 "DEM"
lab val pid3cat pid3cat
tab pid3cat race

//Finalizing sample
keep if (White==1 & REP==1)|(White==1 & DEM==1)|(Black==1)
gen samplegroups = .
replace samplegroups = 1 if White==1 & REP==1
replace samplegroups = 2 if White==1 & DEM==1
replace samplegroups = 3 if Black==1
lab def samplegroups 1"White REP" 2"White DEM" 3"Black"
lab val samplegroups samplegroups
tab samplegroups

/*Treatment*/
gen expcond = .
replace expcond = 1 if treatment =="gene"
replace expcond = 2 if treatment =="neutral"
replace expcond = 3 if treatment =="ses"
lab def expcond 1 "Genetic Frame" 2"Neutral" 3"SES Frame"
lab val expcond expcond

recode expcond (1=1)(2 3=0)(else=.), gen(genecond)
recode expcond (3=1)(1 2=0)(else=.), gen(sescond)
lab var genecond "Genetic Framing"
lab var sescond "SES Framing"


//MANIPULATION CHECK//
//open-ends (check_article)
//any appearance of gene/genetic/genetics
//vs. any appearance of socioeconomic/SES/socio/econ
gen comment_lower = lower(check_article)
*replace period and commas with space
replace comment_lower = subinstr(comment_lower, ".", " ",.)
replace comment_lower = subinstr(comment_lower, ",", " ",.)
set more off
foreach word in gene genes heredity inherit genetic genetics genetically counseling biology  {
gen `word'=1 if strpos(comment_lower," `word' ")>0
}
egen anygeneopen=anymatch(gene-biology), val(1)
tab anygeneopen expcond, col chi2
foreach word in socio social socioeconomic status economy economic disadvantage disadvantages therapy SES {
gen `word'=1 if strpos(comment_lower," `word' ")>0
}
egen anySESopen  =anymatch(socio-SES), val(1)
tab anySESopen expcond, col chi2


//BALANCE CHECK
recode gender (1=0)(2=1)(else=.), gen(female)
recode educ (3 5=0)(6=.25)(7=.5)(8=.75)(9 10=1)(else=.), gen(ed5cat)
recode income (1=0)(4=.2)(5=.4)(6=.6)(8=.8)(10 12=1)(else=.), gen(faminc6)
lab var ed5cat "Education"
lab var female "Female"
lab var ageyears "Age"
lab var pid3cat "Party ID"
lab var faminc6 "Income"
lab var expcond "Experimental Condition"
hotelling ageyear female ed5cat faminc6 pid3cat Black if sescond~=1, by(genecond)
hotelling ageyear female ed5cat faminc6 pid3cat Black if genecond~=1, by(sescond)

table (var) (expcond), statistic(mean ageyear female ed5cat faminc6 pid3cat Black) statistic(sd ageyear female ed5cat faminc6 pid3cat Black) statistic(count expcond) nformat(%9.2f mean sd) nototals
collect levelsof result
collect style header result, level(hide)
collect title "Means, SD, N per Experimental Condition"
collect preview


/*DVs*/
recode bat1_health (1=0 "not at all")(2=.33)(3=.67)(4=1 "A great deal")(else=.), gen(genes_health)
recode bat2_health (1=0 "not at all")(2=.33)(3=.67)(4=1 "A great deal")(else=.), gen(ses_health)

recode bat1_edu (1=0 "not at all")(2=.33)(3=.67)(4=1 "A great deal")(else=.), gen(genes_edu)
recode bat2_edu (1=0 "not at all")(2=.33)(3=.67)(4=1 "A great deal")(else=.), gen(ses_edu)

recode bat1_crime (1=0 "not at all")(2=.33)(3=.67)(4=1 "A great deal")(else=.), gen(genes_crime)
recode bat2_crime (1=0 "not at all")(2=.33)(3=.67)(4=1 "A great deal")(else=.), gen(ses_crime)

gen genes_index = (genes_health+genes_edu+genes_crime)/3
gen ses_index = (ses_health+ses_edu+ses_crime)/3
alpha genes_health genes_edu genes_crime
alpha ses_health ses_edu ses_crime
corr genes_index ses_index

**********TABLE 1**********
foreach v of varlist genes_index genes_health genes_edu genes_crime ses_index ses_health ses_edu ses_crime {
reg `v' genecond sescond
est store `v'
}
est table genes_index genes_health genes_edu genes_crime ses_index ses_health ses_edu ses_crime,  b(%9.2f) se stats(N) style(col) eq(1)
est table genes_index genes_health genes_edu genes_crime ses_index ses_health ses_edu ses_crime,  b(%9.2f) star(.05 .01 .001) stats(N) style(col) eq(1)

/*Testing equivalence across conditions*/
suest genes_health genes_edu
test [genes_health_mean]genecond=[genes_edu_mean]genecond
//reporting one-tailed p-values which require halving the two-tailed p-value reported
disp .0611/2

suest genes_health genes_crime
test [genes_health_mean]genecond=[genes_crime_mean]genecond
//reporting one-tailed p-values which require halving the two-tailed p-value reported
disp .1178/2

//calculating effect size
//difference of means/sd 
ttest genes_index if sescond==0, by(genecond)
disp -.0489863/.2757905

ttest genes_health if sescond==0, by(genecond)
disp -.0677174/.3000686


*****HETEROGENEITY*****
**********START FIGURE 5**********
*****PLACE FULL RESULTS IN APPENDIX*****
forval j=1/3 {
foreach v of varlist genes_index genes_health genes_edu genes_crime ses_index ses_health ses_edu ses_crime {
reg `v' genecond sescond if samplegroups==`j'
est store `v'_`j'
}
}
forval j=1/3 {
est table genes_index_`j' genes_health_`j' genes_edu_`j' genes_crime_`j' ses_index_`j' ses_health_`j' ses_edu_`j' ses_crime_`j', b(%9.2f) se stats(N) style(col) eq(1)
est table genes_index_`j' genes_health_`j' genes_edu_`j' genes_crime_`j' ses_index_`j' ses_health_`j' ses_edu_`j' ses_crime_`j', b(%9.2f) star(.05 .01 .001) stats(N) style(col) eq(1)
}

coefplot (genes_index, mcolor(black) mfcolor(white) lcolor(gray)) (genes_index_2, mcolor(blue) lcolor(blue)) (genes_index_3, mcolor(black) lcolor(black)) (genes_index_1, mcolor(red)) , legend(label(2 "All") label(4 "White DEM") label(6 "Black") label(8 "White GOP") row(4) position(3)) xline(0) title("DV: Genes Index") name(genes_index, replace) keep(genecond sescond)

coefplot (ses_index, mcolor(black) mfcolor(white) lcolor(gray)) (ses_index_2, mcolor(blue) lcolor(blue)) (ses_index_3, mcolor(black) lcolor(black)) (ses_index_1, mcolor(red)) , legend(label(2 "All") label(4 "White DEM") label(6 "Black") label(8 "White GOP") row(4) position(3)) xline(0) title("DV: SES Index") name(ses_index, replace) keep(genecond sescond)

graph combine genes_index ses_index, ycommon xcommon ysize(6) xsize(12)
graph export Figure5.pdf, replace
**********END FIGURE 5**********

*test for difference in effect size
suest genes_index_1 genes_index_2
test [genes_index_1_mean]genecond=[genes_index_2_mean]genecond
suest genes_index_1 genes_index_3
test [genes_index_1_mean]genecond=[genes_index_3_mean]genecond


**********FOOTNOTE & APPENDIX
//timing
sum treat_timer_First_Click treat_timer_Last_Click treat_timer_Page_Submit
table expcond samplegroups, stat(mean treat_timer_Page_Submit) nformat(%9.2f)

//word length of surprise recall
gen wordcount = wordcount(comment_lower)
table expcond samplegroups, stat(mean wordcount) nformat(%9.2f)
reg wordcount i.expcond i.samplegroups
reg wordcount i.expcond i.samplegroups ageyear female ed5cat faminc6 pid3cat


**********UNPACKING SES CONDITION**********
*****Footnote 25*****
//in each condition, top words
//what words are uniquely predictive of one condition versus the others
preserve
keep comment_lower expcond
save openends.dta, replace
restore

* txttool is the command, commenttext is the variable to be used, gen(cleaned) creates a new variable with all lower case text
* bagwords breaks out every word into its own variable

use openends.dta, clear
preserve
keep if expcond ==1
save openends_Genetic.dta, replace
restore
preserve
keep if expcond ==2
save openends_Neutral.dta, replace
restore
preserve
keep if expcond ==3
save openends_SES.dta, replace
restore

foreach v in Genetic Neutral SES {
use openends_`v'.dta, clear
txttool comment_lower, gen(cleaned_open) bagwords stem stopwords(stopword.txt)
save "cleaned_open_`v'", replace
}

//looking at the new dataset of bag of words
foreach v in Genetic Neutral SES {
use cleaned_open_`v', clear
foreach j of varlist w_* {
egen mean`j' = mean(`j')
}
keep mean*

save "means_open_`v'", replace
}

use  "means_open_Genetic", clear
drop if _n>1
order _all, sequential
drop meanw_2-meanw_1968
rename meanw_long renamew_llong
rename meanw_* *
xpose, clear varname format(%6.2f)
sum v1, det
egen top50=pctile(v1), p(50)
egen top25=pctile(v1), p(75)
keep if v1>top25
sum
sum v1, det
graph hbar v1 if v1>.02, over(_varname, sort(1) label(labsize(vsmall))) title("Top Words" "Genetic Condition") ytitle(" ") name(Genetic, replace)

use  "means_open_Neutral", clear
drop if _n>1
order _all, sequential
drop meanw_99-meanw_400
rename meanw_* *
xpose, clear varname format(%6.2f)
sum v1, det
egen top50=pctile(v1), p(50)
egen top25=pctile(v1), p(75)
keep if v1>top25
sum
sum v1, det
graph hbar v1 if v1>.02, over(_varname, sort(1) label(labsize(vsmall))) title("Top Words" "Neutral Condition") ytitle(" ") name(Neutral, replace)

use  "means_open_SES", clear
drop if _n>1
order _all, sequential
drop meanw_9-meanw_99 
rename meanw_long renamew_llong 
rename meanw_* *
xpose, clear varname format(%6.2f)
sum v1, det
egen top50=pctile(v1), p(50)
egen top25=pctile(v1), p(75)
keep if v1>top25
sum
sum v1, det
graph hbar v1 if v1>.02, over(_varname, sort(1) label(labsize(vsmall))) title("Top Words" "SES Condition") ytitle(" ") name(SES, replace)

graph combine Genetic Neutral SES, ycommon col(3) imargin(tiny)

//next, identify distinctive words
//pr(mention a given word|condition)
//what are the words where "condition" is most significant?
//DV: prevalence of the word
//IV: condition (dataset)
//stack dataset so that each line represents a different condition

use  "means_open_Genetic", clear
drop if _n>1
order _all, sequential
drop meanw_2-meanw_1968
rename meanw_long renamew_llong
rename meanw_* *
xpose, clear varname format(%6.2f)
keep if v1>.02
rename v1 Genetic
rename _varname word
save "top_words_Genetic", replace

use  "means_open_Neutral", clear
drop if _n>1
order _all, sequential
drop meanw_99-meanw_400
rename meanw_* *
xpose, clear varname format(%6.2f)
keep if v1>.02
rename v1 Neutral
rename _varname word
save "top_words_Neutral", replace

use  "means_open_SES", clear
drop if _n>1
order _all, sequential
drop meanw_9-meanw_99
rename meanw_long renamew_llong
rename meanw_* *
xpose, clear varname format(%6.2f)
keep if v1>.02
rename v1 SES
rename _varname word
save "top_words_SES", replace

use top_words_Genetic, clear
merge 1:1 word using top_words_Neutral
merge 1:1 word using top_words_SES, gen(_merge2)

gen Gen_Only = .
replace Gen_Only =1 if Genetic>0 & Genetic<1 & Neutral==.& SES==.
gen Neutral_Only =.
replace Neutral_Only =1 if Neutral>0 & Neutral<1 & Genetic==. & SES==.
gen SES_Only =.
replace SES_Only =1 if SES>0 & SES<1 & Genetic==. & Neutral==.
egen numberconditions = rownonmiss(Genetic Neutral SES)

//words that are unique to each condition 
tab word if Gen_Only==1
tab word if Neutral_Only==1
tab word if SES_Only==1

*****cleaning up directory*****
foreach v in SES Genetic Neutral {
	erase openends_`v'.dta
	erase cleaned_open_`v'.dta
	erase means_open_`v'.dta
	erase top_words_`v'.dta
}
erase openends.dta

log close

translate "Anoll Kam Marcellin Bovitz Analyses.smcl" "Anoll Kam Marcellin Bovitz Analyses.pdf"
